From 9ab74b250b00062ca051a03d4df0cd7b549e6585 Mon Sep 17 00:00:00 2001 From: "emellor@leeni.uk.xensource.com" Date: Fri, 9 Dec 2005 10:49:29 +0000 Subject: [PATCH] Add one more fork to the Xend start-up process, this time with the parent staying alive to watch the child. If the child crashes, then the parent starts another one in its place. This provides robustness in the face of crashes like one currently tracked as bug #411. Tidy up chunks of SrvDaemon.py on the way past. Signed-off-by: Ewan Mellor --- tools/python/xen/xend/server/SrvDaemon.py | 165 +++++++++++++--------- tools/python/xen/xend/server/SrvServer.py | 8 +- 2 files changed, 102 insertions(+), 71 deletions(-) diff --git a/tools/python/xen/xend/server/SrvDaemon.py b/tools/python/xen/xend/server/SrvDaemon.py index ff0a4ca9be..29e4c21ef0 100644 --- a/tools/python/xen/xend/server/SrvDaemon.py +++ b/tools/python/xen/xend/server/SrvDaemon.py @@ -23,73 +23,37 @@ import relocate from params import * +XEND_PROCESS_NAME = 'xend' + + class Daemon: """The xend daemon. """ def __init__(self): - self.shutdown = 0 - self.traceon = 0 + self.traceon = False self.tracefile = None self.traceindent = 0 self.child = 0 - - def read_pid(self, pidfile): - """Read process id from a file. - - @param pidfile: file to read - @return pid or 0 - """ - if os.path.isfile(pidfile) and os.path.getsize(pidfile): - try: - f = open(pidfile, 'r') - try: - return int(f.read()) - finally: - f.close() - except: - return 0 - else: - return 0 - def find_process(self, pid, name): - """Search for a process. - @param pid: process id - @param name: process name - @return: pid if found, 0 otherwise - """ - running = 0 - if pid: - lines = os.popen('ps %d 2>/dev/null' % pid).readlines() - exp = '^ *%d.+%s' % (pid, name) - for line in lines: - if re.search(exp, line): - running = pid - break - return running - - def cleanup_process(self, pidfile, name, kill): - """Clean up the pidfile for a process. + def cleanup_xend(self, kill): + """Clean up the Xend pidfile. If a running process is found, kills it if 'kill' is true. - @param pidfile: pid file - @param name: process name @param kill: whether to kill the process @return running process id or 0 """ running = 0 - pid = self.read_pid(pidfile) - if self.find_process(pid, name): + pid = read_pid(XEND_PID_FILE) + if find_process(pid, XEND_PROCESS_NAME): if kill: - os.kill(pid, 1) + os.kill(pid, signal.SIGTERM) else: running = pid - if running == 0 and os.path.isfile(pidfile): - os.remove(pidfile) + if running == 0 and os.path.isfile(XEND_PID_FILE): + os.remove(XEND_PID_FILE) return running - def cleanup_xend(self, kill): - return self.cleanup_process(XEND_PID_FILE, "xend", kill) def status(self): """Returns the status of the xend daemon. @@ -97,15 +61,15 @@ class Daemon: 0 Running 3 Not running """ - if self.cleanup_process(XEND_PID_FILE, "xend", False) == 0: + if self.cleanup_xend(False) == 0: return 3 else: return 0 - def fork_pid(self, pidfile): - """Fork and write the pid of the child to 'pidfile'. - @param pidfile: pid file + def fork_pid(self): + """Fork and write the pid of the child to XEND_PID_FILE. + @return: pid of child in parent, 0 in child """ @@ -113,7 +77,7 @@ class Daemon: if self.child: # Parent - pidfile = open(pidfile, 'w') + pidfile = open(XEND_PID_FILE, 'w') try: pidfile.write(str(self.child)) finally: @@ -121,6 +85,7 @@ class Daemon: return self.child + def daemonize(self): if not XEND_DAEMONIZE: return @@ -132,9 +97,9 @@ class Daemon: # Fork, this allows the group leader to exit, # which means the child can never again regain control of the # terminal - if self.fork_pid(XEND_PID_FILE): - self.exit() - + if os.fork(): + os._exit(0) + # Detach from standard file descriptors, and redirect them to # /dev/null or the log as appropriate. os.close(0) @@ -189,7 +154,41 @@ class Daemon: # Child self.daemonize() self.tracing(trace) - self.run(os.fdopen(w, 'w')) + + # If Xend proper segfaults, then we want to restart it. Thus, + # we fork a child for running Xend itself, and if it segfaults + # (or exits any way other than cleanly) then we run it again. + # The first time through we want the server to write to the (r,w) + # pipe created above, so that we do not exit until the server is + # ready to receive requests. All subsequent restarts we don't + # want this behaviour, or the pipe will eventually fill up, so + # we just pass None into run in subsequent cases (by clearing w + # in the parent of the first fork). + while True: + pid = self.fork_pid() + if pid: + os.close(w) + w = False + + (_, status) = os.waitpid(pid, 0) + + if os.WIFEXITED(status): + code = os.WEXITSTATUS(status) + log.info('Xend exited with status %d.', code) + sys.exit(code) + + if os.WIFSIGNALED(status): + sig = os.WTERMSIG(status) + + if sig in (signal.SIGINT, signal.SIGTERM): + log.info('Xend stopped due to signal %d.', sig) + sys.exit(0) + else: + log.fatal( + 'Xend died due to signal %d! Restarting it.', + sig) + else: + self.run(w and os.fdopen(w, 'w') or None) return ret @@ -290,18 +289,11 @@ class Daemon: if XEND_DEBUG: traceback.print_exc() log.exception("Exception starting xend (%s)" % ex) - status.write('1') - status.close() - self.exit(1) + if status: + status.write('1') + status.close() + sys.exit(1) - def exit(self, rc=0): - # Calling sys.exit() raises a SystemExit exception, which only - # kills the current thread. Calling os._exit() makes the whole - # Python process exit immediately. There doesn't seem to be another - # way to exit a Python with running threads. - #sys.exit(rc) - os._exit(rc) - def instance(): global inst try: @@ -311,10 +303,47 @@ def instance(): return inst +def read_pid(pidfile): + """Read process id from a file. + + @param pidfile: file to read + @return pid or 0 + """ + if os.path.isfile(pidfile) and os.path.getsize(pidfile): + try: + f = open(pidfile, 'r') + try: + return int(f.read()) + finally: + f.close() + except: + return 0 + else: + return 0 + + +def find_process(pid, name): + """Search for a process. + + @param pid: process id + @param name: process name + @return: pid if found, 0 otherwise + """ + running = 0 + if pid: + lines = os.popen('ps %d 2>/dev/null' % pid).readlines() + exp = '^ *%d.+%s' % (pid, name) + for line in lines: + if re.search(exp, line): + running = pid + break + return running + + def main(argv = None): global XEND_DAEMONIZE - XEND_DAEMONIZE = 0 + XEND_DAEMONIZE = False if argv is None: argv = sys.argv diff --git a/tools/python/xen/xend/server/SrvServer.py b/tools/python/xen/xend/server/SrvServer.py index 576de6be16..38a06be9e3 100644 --- a/tools/python/xen/xend/server/SrvServer.py +++ b/tools/python/xen/xend/server/SrvServer.py @@ -68,7 +68,8 @@ class XendServers: # Running the network script will spawn another process, which takes # the status fd with it unless we set FD_CLOEXEC. Failing to do this # causes the read in SrvDaemon to hang even when we have written here. - fcntl.fcntl(status, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + if status: + fcntl.fcntl(status, fcntl.F_SETFD, fcntl.FD_CLOEXEC) Vifctl.network('start') threads = [] @@ -93,8 +94,9 @@ class XendServers: if threads_left: time.sleep(.5) - status.write('0') - status.close() + if status: + status.write('0') + status.close() for t in threads: t.join() -- 2.30.2